#!/usr/bin/perl -w
#
# hls-fetch - Download and decrypt HTTP Live Streaming videos.
# Copyright (C) 2012 Oskar Liljeblad
# Copyright (C) 2015, 2016 Desktopd Project
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#

use strict;
use Getopt::Long;
use HTML::Parser;
use LWP::UserAgent;
use JSON;
use File::Temp qw(tempfile);
use URI::URL;
use constant READ_SIZE => 1024;

my %opt = ('bandwidth' => 'max', 'user-agent' => 'Mozilla/5.0 (compatible; hls-fetch/1.0)');
Getopt::Long::GetOptions(\%opt, 'embedded', 'svtplay', 'playlist', 'output|o=s', 'bandwidth|b=s', 'quiet|q', 'force|f', 'verbose|v', 'no-decrypt', 'retry', 'user-agent=s', 'resume-at=i', 'version', 'help') || exit 1;

if ($opt{'version'}) {
  print "hls-fetch 0.2\n";
  print "Copyright (C) 2012 Oskar Liljeblad\n";
  print "Copyright (C) 2015-2016 Desktopd Project\n";
  print "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.\n";
  print "This is free software: you are free to change and redistribute it.\n";
  print "There is NO WARRANTY, to the extent permitted by law.\n\n";
  print "Written by Oskar Liljeblad.\n";
  exit 0;
}
if ($opt{'help'}) {
  print "Usage: hls-fetch [OPTION]... URL\n";
  print "Download and decrypt videos served by the HTTP Live Streaming (HLS) protocol.\n\n";
  print "      --embedded        URL refers to a page with <video> tag (default)\n";
  print "      --playlist        URL refers to an M3U (m3u8) playlist\n";
  print "      --svtplay         URL refers to an SVT Play page (svtplay.se)\n";
  print "  -o, --output=FILE     save video to FILE rather than \"video.ts\"\n";
  print "  -f, --force           force overwriting existing output file\n";
  print "  -b, --bandwidth=SPEC  pick video with specified bandwidth (bits/s),\n";
  print "                        lowest (\"min\") or highest (\"max\") (default max)\n";
  print "  -v, --verbose         explain what is being done\n";
  print "  -q, --quiet           no output other than errors\n";
  print "      --no-decrypt      skip decryption even if stream should be decrypted\n";
  print "      --retry           retry connection if a network problem is detected\n";
  print "      --user-agent=UA   use this string as HTTP user agent\n";
  print "      --resume-at=N     resume at the Nth segment\n";
  print "      --help            display this help and exit\n";
  print "      --version         output version information and exit\n";
  print "\nDecryption requires openssl.\n";
  print "\nReport bugs to Oskar Liljeblad <oskar\@osk.mine.nu>.\n";
  exit 0;
}

die "--embedded, --playlist and --svtplay are mutually exclusive\n" if (scalar grep { defined } @opt{'svtplay', 'playlist', 'embedded'}) > 1;
$opt{'embedded'} = 1 if !$opt{'playlist'} && !$opt{'svtplay'};
die "--verbose and --quiet are mutually exclusive\n" if $opt{'verbose'} && $opt{'quiet'};
die "non-numeric --bandwidth specified\n" if $opt{'bandwidth'} !~ /^(min|max|\d+)$/;
die "missing URL operand\n" if !@ARGV;
if (!exists $opt{'output'}) {
  $opt{'output'} = 'video.ts';
  warn "no output file specified, assuming video.ts\n" if !$opt{'quiet'};
}

# switch statement
# Mimicking iOS magically allows successful downloads in many cases
while (1) {
  if ("ipad" eq $opt{'user-agent'}) {
    $opt{'user-agent'} = 'Mozilla/5.0 (iPad; CPU OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1';
    last;
  }
  
  if ("iphone" eq $opt{'user-agent'}) {
    $opt{'user-agent'} = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1';
    last;
  }
  
  last;
}

my ($url) = @ARGV;
my $browser = LWP::UserAgent->new;
$browser->cookie_jar({});
$browser->agent($opt{'user-agent'});

my $video_file = $opt{'output'};
die "$video_file: file exists, not overwriting without -f/--force\n" if !$opt{'force'} && -e $video_file;
open(my $video_fh, '>', $video_file) || die "$video_file: cannot open file: $!\n";

if ($opt{'svtplay'}) {
  my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@;
  my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_svtplay_tag, 'tagname,@attr']);
  my ($json_path, $json_title);

  sub handle_svtplay_tag {
    my ($tag, %attr) = @_;
    if ($tag eq 'a' && exists $attr{'id'} && $attr{'id'} =~ /^player($|_)/ && exists $attr{'data-json-href'} && exists $attr{'data-title'}) {
      $json_path = $attr{'data-json-href'};
      $json_title = $attr{'data-title'};
    }
  }

  $parser->parse($data);
  die "$url: cannot find any video on page\n" if !defined $json_path;
  print "Title: $json_title\n" if $opt{'verbose'};

  my $json_url = url($json_path, $url)->abs;
  $json_url->query('output=json');
  $json_url = $json_url->as_string();
  print "URL (JSON): $json_url\n" if $opt{'verbose'};

  $data = eval { fetch_url($json_url) }; die "$json_url: cannot fetch JSON data: $@" if $@;
  my $json = decode_json($data) // die "$json_url: cannot parse JSON data\n";
  die "$json_url: invalid JSON data\n" if !exists $json->{'video'}->{'videoReferences'}->[0];
  ($url) = map { $_->{'url'} } grep { $_->{'playerType'} eq 'ios' } @{$json->{'video'}->{'videoReferences'}};
  die "$json_url: missing video URL for 'ios' type in JSON data\n" if !defined $url;
  print "URL (master): $url\n" if $opt{'verbose'};
}
elsif ($opt{'embedded'}) {
  print "URL (embedded): $url\n" if $opt{'verbose'};
  my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@;
  my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_playlist_tag, 'tagname,@attr']);
  my $index_url;

  sub handle_playlist_tag {
    my ($tag, %attr) = @_;
    if ($tag eq 'video') {
      $index_url = $attr{'src'} if exists $attr{'src'};
    } elsif ($tag eq 'source') {
      $index_url = $attr{'src'} if exists $attr{'src'} && exists $attr{'type'} && $attr{'type'} eq 'application/vnd.apple.mpegurl';
    }
  }

  $parser->parse($data);
  if (((!defined $index_url) || "" eq $index_url) && $data =~ /(https?:.+\.m3u8)/) {
    $index_url = $1;
  }
  die "$url: cannot find any video on page\n" if !defined $index_url;
  $url = url($index_url, $url)->abs()->as_string();
  print "URL (master): $url\n" if $opt{'verbose'};
}

my $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@;
my @lines = split(/\r*\n|\r\n*/, $data);
die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U';

if (!grep { /^#EXTINF:/ } @lines) {
  my (@streams, $last_stream);
  foreach my $line (@lines) {
    if ($line =~ /^#EXT-X-STREAM-INF:(.*)$/) {
      $last_stream = { parse_m3u_attribs($url, $1) };
      push @streams, $last_stream;
    } elsif ($line !~ /^#EXT/) {
      die "$url: missing #EXT-X-STREAM-INF for URL: $line\n" if !defined $last_stream;
      $last_stream->{'URL'} = $line;
      $last_stream = undef;
    }
  }
  die "$url: no streams found in playlist\n" if !@streams;

  warn "$url: non-numeric bandwidth in playlist\n" if grep { $_->{'BANDWIDTH'} =~ /\D/ } @streams;
  my @bandwidths = sort { $a <=> $b } grep { /^\d+$/ } map { $_->{'BANDWIDTH'} } @streams;
  print "Bandwidths: ", join(', ', @bandwidths), "\n" if $opt{'verbose'};
  my $stream;
  if ($opt{'bandwidth'} eq 'min') {
    ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[0] } @streams;
  } elsif ($opt{'bandwidth'} eq 'max') {
    ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[-1] } @streams;
  } else {
    ($stream) = grep { $opt{'bandwidth'} == $_->{'BANDWIDTH'} } @streams;
    die "$url: no streams with bandwidth $opt{'bandwidth'} in playlist\n" if !defined $stream;
  }
  print "Bandwidth (selected): $stream->{'BANDWIDTH'}\n" if $opt{'verbose'};
  $url = url($stream->{'URL'}, $url)->abs()->as_string();
  print "URL (index): $url\n" if $opt{'verbose'};

  $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@;
  @lines = split(/\r?\n/, $data);
  die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U';
}

my $sequence = 0;
my (%segments, $cryptkey_url);
foreach my $line (@lines) {
  if ($line =~ /^#EXT-X-MEDIA-SEQUENCE:(\d+)$/) {
    $sequence = $1;
    print "First sequence number: $sequence\n" if $opt{'verbose'};
  } elsif ($line =~ /^#EXT-X-KEY:(.*)$/) {
    my %attr = parse_m3u_attribs($url, $1);
    die "$url: unsupported encryption method $attr{'METHOD'} in playlist\n" if exists $attr{'METHOD'} && $attr{'METHOD'} ne 'AES-128';
    $cryptkey_url = $attr{'URI'};
    die "$url: missing encryption key URI in playlist\n" if !defined $cryptkey_url;
  } elsif ($line !~ /^#EXT/) {
    $segments{$sequence} = { 'url' => $line, 'cryptkey_url' => $cryptkey_url };
    $sequence++;
  }
}
die "$url: no segments in playlist\n" if !scalar keys %segments;

my %cryptkeys;
#my $cryptkey;
#if (defined $cryptkey_url) {
#  print "URL (key): $cryptkey_url\n" if $opt{'verbose'};
#  $cryptkey = eval { fetch_url($cryptkey_url) }; die "$cryptkey_url: cannot fetch encryption key: $@" if $@;
#  $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey));
#  print "Key: $cryptkey\n" if $opt{'verbose'};
#}
print "Segments: ", scalar keys %segments, "\n" if $opt{'verbose'};

$| = 1;
foreach my $sequence (sort { $a <=> $b } keys %segments) {
  if ($opt{'resume-at'} && $sequence < $opt{'resume-at'}) {
     next;
  }
  
  my $segment = $segments{$sequence};
  my $segment_url = url($segment->{'url'}, $url)->abs()->as_string();
  print "URL (segment $sequence/", scalar keys %segments, "): $segment_url\n" if $opt{'verbose'};
  printf "\r%d/%d", $sequence, scalar keys %segments if !$opt{'quiet'} && !$opt{'verbose'};

  if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'} && !exists $cryptkeys{$segment->{'cryptkey_url'}}) {
    print "URL (key): ", $segment->{'cryptkey_url'}, "\n" if $opt{'verbose'};
    my $cryptkey = eval { fetch_url($segment->{'cryptkey_url'}) };
    die "$segment->{'cryptkey_url'}: cannot fetch encryption key: $@" if $@;
    $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey));
    print "Key: $cryptkey\n" if $opt{'verbose'};
    $cryptkeys{$segment->{'cryptkey_url'}} = $cryptkey;
  }

  my ($segment_fh, $segment_file) = tempfile();
  close $segment_fh;
  eval {
    eval { fetch_url($segment_url, $segment_file) }; die "$segment_url: cannot not fetch segment: $@" if $@;
    if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'}) {
      my ($decrypt_fh, $decrypt_file) = tempfile();
      close $decrypt_fh;
      my $iv = sprintf('%032x', $sequence);
      my @cmd = ('openssl', 'aes-128-cbc', '-d', '-in', $segment_file, '-out', $decrypt_file, '-K', $cryptkeys{$segment->{'cryptkey_url'}}, '-iv', $iv);
      system @cmd;
      unlink $segment_file || warn "$segment_file: cannot remove file: $!\n";
      $segment_file = $decrypt_file;
      die "$segment_file: openssl failed (status $?)\n" if $? != 0;
    }
    open ($segment_fh, '<', $segment_file) || die "$segment_file: cannot open file: $!\n";
    for (;;) {
      my $size = sysread($segment_fh, $data, READ_SIZE);
      die "$segment_file: cannot read from file: $!\n" if !defined $size;
      last if $size == 0;
      die "$video_file: cannot write to file: $!\n" if !defined syswrite($video_fh, $data);
    }
    close $segment_fh;
  };
  unlink $segment_file || warn "$segment_file: cannot remove file: $!\n";
  die $@ if $@;
}
close $video_fh;

sub parse_m3u_attribs {
  my ($url, $attr_str) = @_;
  my %attr;
  for (my $as = $attr_str; $as ne ''; ) {
    $as =~ s/^?([^=]*)=([^,"]*|"[^"]*")\s*(,\s*|$)// or die "$url: invalid attributes in playlist: $attr_str\n";
    my ($key, $val) = ($1, $2);
    $val =~ s/^"(.*)"$/$1/;
    $attr{$key} = $val;
  }
  return %attr;
}

sub fetch_url {
  my ($url, $filename) = @_;
  my $max_retries = 10;
  my $retry_delay = 2;
  my $retry_delay_factor = 2;
  my $count = 0;
  while (1) {
    my $response;
    if (defined $filename) {
      $response = $browser->get($url, ":content_file" => $filename);
      #die $response->status_line(), "\n" if !$response->is_success;
      return undef if $response->is_success;
    } else {
      $response = $browser->get($url);
      #die $response->status_line(), "\n" if !$response->is_success;
      return $response->decoded_content() if $response->is_success;
    }
    
    if (500 != $response->code && 503 != $response->code || $response->message =~ /Server Error/i || (!$opt{'retry'})) {
      # Server explicitly failed, should not blindly retry
      die $response->status_line(), "\n";
    }
    
    print $response->status_line(), "\n" if !$opt{'quiet'};
    # Nasty connection problems, should retry
    if ($count < $max_retries) {
      print "Sleeping for $retry_delay secs before retry\n" if $opt{'verbose'};
      sleep $retry_delay;
      $retry_delay = $retry_delay * $retry_delay_factor;
      $count++;
      print "Retrying HTTP request: $url\n" if $opt{'verbose'};
      next;
    }
    
    die "Maximum number of retries reached: is the resource available at the specified URI?\n";
  }
}
